

sink("./PSRM Replication Files/California/AVR_CA18_log.txt", append=F, type="output")

# Replication File for Seljan and Gronke, 2022, "Happy Birthday You Get
# To Vote". 
#
# ANALYSIS: California AVR analysis, CA 2018 election
#
#devtools::install_github("strengejacke/sjPlot")
#devtools::install_github("ekhartman/equivtest")
library(tidyverse)
library(devtools)
library(lubridate)
library(margins)
library(equivtest)
library(sjPlot)
library(doBy)
library(mosaic)
library(AER)
library(splitstackshape)
library(stargazer)


### ADDITIONAL DATA SET UP

CA_file<-readRDS("./PSRM Replication Files/California/CA2018_anon.rds")


# read birthdates and registration dates as lubridates

CA_file$lub_birthdate<-ymd(CA_file$DOB)
CA_file$birth_year<-year(CA_file$lub_birthdate)
CA_file$birth_day_of_year<-yday(CA_file$lub_birthdate)
CA_file$lub_regdate<-ymd(CA_file$RegistrationDate)
CA_file$reg_year<-year(CA_file$lub_regdate)
CA_file$reg_day_of_year<-yday(CA_file$lub_regdate)
CA_file$birthday_string<-str_sub(CA_file$DOB, 1, 5)

# Registration year variables

CA_file$regyear2019<-ifelse(CA_file$reg_year==2019, 1, 0)
CA_file$regyear2018<-ifelse(CA_file$reg_year==2018, 1, 0)
CA_file$regyear2017<-ifelse(CA_file$reg_year==2017, 1, 0)
CA_file$regyear2016<-ifelse(CA_file$reg_year==2016, 1, 0)
CA_file$regyear2015<-ifelse(CA_file$reg_year==2015, 1, 0)
CA_file$regyear2014<-ifelse(CA_file$reg_year==2014, 1, 0)

# Create objects for notable dates/VEP

electionday2018<-yday(ymd(181106))
deadlineNov2018<-yday(ymd(181022))
VEP=25635139


# Compute age at November 2016 General Election accounting for leap years
CA_file <- CA_file  %>%
  mutate(age_at_2018election = case_when(birth_day_of_year+1<=electionday2018 & leap_year(lub_birthdate)==TRUE ~ 2018-birth_year, 
                                         birth_day_of_year<=electionday2018 & leap_year(lub_birthdate)==FALSE ~ 2018-birth_year, 
                                         birth_day_of_year+1>electionday2018 & leap_year(lub_birthdate)==TRUE ~ 2018-birth_year-1,
                                         TRUE ~ 2018-birth_year-1))


# Limit to ages 18-110

CA_file<-CA_file %>%
  filter(age_at_2018election>=18 & age_at_2018election<=110)


# Compute difference between registration and birth dates. 

CA_file$doydiff<-CA_file$reg_day_of_year-CA_file$birth_day_of_year

CA_file<-CA_file %>% 
  mutate(doydifference365=case_when(doydiff<(-182) ~ doydiff+365, 
                                    doydiff>182 ~ doydiff-365,
                                    TRUE ~ as.numeric(doydiff)
                                    
  ))



# Create Dummies for party status

CA_file$democrat<-ifelse(CA_file$PartyCode=="DEM",1,0)
CA_file$republican<-ifelse(CA_file$PartyCode=="REP",1,0)
CA_file<-CA_file %>% mutate(third_party=case_when(PartyCode=="REP" ~ 0, 
                                                  PartyCode=="DEM" ~ 0,
                                                  PartyCode=="NPP" ~ 0,
                                                  TRUE~1))

# Create instrument (Pre-registration deadline birthday) 

CA_file <- CA_file  %>%
  mutate(bday_preOct22 = case_when(birth_day_of_year>deadlineNov2018+1 & leap_year(lub_birthdate)==TRUE ~ 0, 
                                   birth_day_of_year>deadlineNov2018 & leap_year(lub_birthdate)==FALSE ~ 0, 
                                   TRUE ~ 1))


# Creates Registration during AVR dummy and Instrumented variable (registered by deadline ). 

CA_file$reg_duringAVR<-ifelse(CA_file$lub_regdate>=mdy(042318) & CA_file$lub_regdate<=mdy(123118),1,0)

CA_file$reg_preOct22<-ifelse(CA_file$lub_regdate<=mdy(102218),1,0)


# Limit data to birthday windows and those registered during AVR implementation. Coding accounts for leap years. 

CA_file <- CA_file  %>%
  mutate(samplewindow = case_when(birth_day_of_year<deadlineNov2018+1-15 & leap_year(lub_birthdate)==TRUE ~ 0, 
                                  birth_day_of_year<deadlineNov2018-15 & leap_year(lub_birthdate)==FALSE ~ 0, 
                                  birth_day_of_year>electionday2018+1 & leap_year(lub_birthdate)==TRUE ~ 0,
                                  birth_day_of_year>electionday2018 & leap_year(lub_birthdate)==FALSE ~ 0,
                                  TRUE ~ 1))


# Is registration withing 30 days of birthdate?
CA_file$doydiffby30<-ifelse(CA_file$doydifference365<=30 &CA_file$doydifference365>=-30,1,0)


# Dummy variable for AVR versus traditional (AVR)

CA_file$AVR<-ifelse(CA_file$RegistrationMethodCode=="DL44" | CA_file$RegistrationMethodCode=="RBM", 1, 0)


# Dummy variable that tags new voters (new18_v3)

CA_file<-CA_file %>%
  mutate(new18_v3=ifelse(VoterStatusReasonCodeDesc=="New Registration" & lub_regdate>=mdy(042318) & vote_yes2016==0,1,0))

# Dummy variables that tag age groups (age1823, age2430, age3140, age4160,age61plus)

CA_file<-CA_file  %>%
  mutate(age1823=ifelse(age_at_2018election>=18 & age_at_2018election<=23, 1, 0),
         age2430=ifelse(age_at_2018election>=24 & age_at_2018election<=30, 1, 0),
         age3140=ifelse(age_at_2018election>=31 & age_at_2018election<=40, 1, 0),
         age4160=ifelse(age_at_2018election>=41 & age_at_2018election<=60, 1, 0),
         age61plus=ifelse(age_at_2018election>=61, 1, 0))



# Data frame exclusive for those registered post AVR within birthday window

sample_CAbday_reg_duringAVR<-CA_file %>%
  filter(reg_duringAVR==1, samplewindow==1)


### FIGURE 1 (California Component)
CA_file %>%
  mutate(AVR_label=ifelse(AVR==1, "AVR", "Traditional")) %>%
  filter(reg_duringAVR==1) %>%
  ggplot(aes(doydifference365))+geom_histogram(binwidth = 5, aes(y=..density..))+
  facet_grid(~AVR_label)+ggtitle("2018 California Registrants")+ 
  xlab("Difference between Birth Date and Registration Date")+ 
  geom_vline(xintercept = 0) + theme_minimal() +   
  scale_x_continuous(limits=c(-182,182), breaks=c(-182, -91, 0, 91, 182)) +
  scale_y_continuous(limits=c(0,0.015), breaks=c(0,0.005, 0.01))

ggsave("./PSRM Replication Files/figures/figure_1_CA.png")





### TABLE 1 (California Component) & Marginal Effects in text


CA_logit_doydiff<-glm(formula = doydiffby30 ~ AVR, family = "binomial", data = subset(CA_file, reg_year == 2018))
summary(CA_logit_doydiff, diagnostics=T)
effects_logit_doydiffCA2016 <- margins(CA_logit_doydiff) 
mean(effects_logit_doydiffCA2016$fitted, na.rm=TRUE)

saveRDS(CA_logit_doydiff, file="./PSRM Replication Files/California/Object Files/CA_logit_doydiff.rds")

# Code to display modal date difference between birthday and registration for AVR Voters

AVR_Voters<-CA_file %>%
  filter(AVR==1)  

Mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

Mode(AVR_Voters$doydifference365)



#### Equivalence tests (Figure 2)

# Tests for full calendar year

reg_duringAVR<-CA_file %>%
  filter(reg_duringAVR==1)

dem0<-reg_duringAVR$democrat[reg_duringAVR$bday_preOct22==0]
dem1<-reg_duringAVR$democrat[reg_duringAVR$bday_preOct22==1]
dem_equiv<-fisher.binom.two.sided(dem0, dem1, eps_sub=.01, alpha=0.05)
dem_equiv<-c(dem_equiv$odds.ratio, dem_equiv$equiv.CI[1], dem_equiv$equiv.CI[2], "Democrat")

rep0<-reg_duringAVR$republican[reg_duringAVR$bday_preOct22==0]
rep1<-reg_duringAVR$republican[reg_duringAVR$bday_preOct22==1]
rep_equiv<-fisher.binom.two.sided(rep0, rep1, eps_sub=.01, alpha=0.05)
rep_equiv<-c(rep_equiv$odds.ratio, rep_equiv$equiv.CI[1], rep_equiv$equiv.CI[2], "Republican")

third0<-reg_duringAVR$third_party[reg_duringAVR$bday_preOct22==0]
third1<-reg_duringAVR$third_party[reg_duringAVR$bday_preOct22==1]
third_equiv<-fisher.binom.two.sided(third0, third1, eps_sub=.01, alpha=0.05)
third_equiv<-c(third_equiv$odds.ratio, third_equiv$equiv.CI[1], third_equiv$equiv.CI[2], "Third Party")

white0<-reg_duringAVR$white[reg_duringAVR$bday_preOct22==0]
white1<-reg_duringAVR$white[reg_duringAVR$bday_preOct22==1]
white_equiv<-fisher.binom.two.sided(white0, white1, eps_sub=.01, alpha=0.05)
white_equiv<-c(white_equiv$odds.ratio, white_equiv$equiv.CI[1], white_equiv$equiv.CI[2], "White")

black0<-reg_duringAVR$black[reg_duringAVR$bday_preOct22==0]
black1<-reg_duringAVR$black[reg_duringAVR$bday_preOct22==1]
black_equiv<-fisher.binom.two.sided(black0, black1, eps_sub=.01, alpha=0.05)
black_equiv<-c(black_equiv$odds.ratio, black_equiv$equiv.CI[1], black_equiv$equiv.CI[2], "Black")

hispanic0<-reg_duringAVR$hispanic[reg_duringAVR$bday_preOct22==0]
hispanic1<-reg_duringAVR$hispanic[reg_duringAVR$bday_preOct22==1]
hispanic_equiv<-fisher.binom.two.sided(hispanic0, hispanic1, eps_sub=.01, alpha=0.05)
hispanic_equiv<-c(hispanic_equiv$odds.ratio, hispanic_equiv$equiv.CI[1], hispanic_equiv$equiv.CI[2], "Hispanic")

asian0<-reg_duringAVR$asian[reg_duringAVR$bday_preOct22==0]
asian1<-reg_duringAVR$asian[reg_duringAVR$bday_preOct22==1]
asian_equiv<-fisher.binom.two.sided(asian0, asian1, eps_sub=.01, alpha=0.05)
asian_equiv<-c(asian_equiv$odds.ratio, asian_equiv$equiv.CI[1], asian_equiv$equiv.CI[2], "Asian")

age18230<-reg_duringAVR$age1823[reg_duringAVR$bday_preOct22==0]
age18231<-reg_duringAVR$age1823[reg_duringAVR$bday_preOct22==1]
age1823_equiv<-fisher.binom.two.sided(age18230, age18231, eps_sub=.01, alpha=0.05)
age1823_equiv<-c(age1823_equiv$odds.ratio, age1823_equiv$equiv.CI[1], age1823_equiv$equiv.CI[2], "Age 18-23")

age24300<-reg_duringAVR$age2430[reg_duringAVR$bday_preOct22==0]
age24301<-reg_duringAVR$age2430[reg_duringAVR$bday_preOct22==1]
age2430_equiv<-fisher.binom.two.sided(age24300, age24301, eps_sub=.01, alpha=0.05)
age2430_equiv<-c(age2430_equiv$odds.ratio, age2430_equiv$equiv.CI[1], age2430_equiv$equiv.CI[2], "Age 24-30")

age31400<-reg_duringAVR$age3140[reg_duringAVR$bday_preOct22==0]
age31401<-reg_duringAVR$age3140[reg_duringAVR$bday_preOct22==1]
age3140_equiv<-fisher.binom.two.sided(age31400, age31401, eps_sub=.01, alpha=0.05)
age3140_equiv<-c(age3140_equiv$odds.ratio, age3140_equiv$equiv.CI[1], age3140_equiv$equiv.CI[2], "Age 31-40")

age41600<-reg_duringAVR$age4160[reg_duringAVR$bday_preOct22==0]
age41601<-reg_duringAVR$age4160[reg_duringAVR$bday_preOct22==1]
age4160_equiv<-fisher.binom.two.sided(age41600, age41601, eps_sub=.01, alpha=0.05)
age4160_equiv<-c(age4160_equiv$odds.ratio, age4160_equiv$equiv.CI[1], age4160_equiv$equiv.CI[2], "Age 41-60")

age61plus0<-reg_duringAVR$age61plus[reg_duringAVR$bday_preOct22==0]
age61plus1<-reg_duringAVR$age61plus[reg_duringAVR$bday_preOct22==1]
age61plus_equiv<-fisher.binom.two.sided(age61plus0, age61plus1, eps_sub=.01, alpha=0.05)
age61plus_equiv<-c(age61plus_equiv$odds.ratio, age61plus_equiv$equiv.CI[1], age61plus_equiv$equiv.CI[2], "Age 61+")

data_equiv<-rbind(third_equiv,  rep_equiv, dem_equiv, asian_equiv, black_equiv, hispanic_equiv, white_equiv,  age61plus_equiv, age4160_equiv,  age3140_equiv, age2430_equiv, age1823_equiv)
data_equiv<-data.frame(data_equiv)
data_equiv<-data_equiv %>%
  rename(estimate=X1, lower=X2, upper=X3, name=X4)

data_equiv$estimate<-as.numeric(data_equiv$estimate) 
data_equiv$lower<-as.numeric(data_equiv$lower) 
data_equiv$upper<-as.numeric(data_equiv$upper) 
data_equiv$year<-"CA 2018"
data_equiv$full<-"Full Implementation Period"

equivalence_tests<-data_equiv



# Sample window only tests 


dem0<-sample_CAbday_reg_duringAVR$democrat[sample_CAbday_reg_duringAVR$bday_preOct22==0]
dem1<-sample_CAbday_reg_duringAVR$democrat[sample_CAbday_reg_duringAVR$bday_preOct22==1]
dem_equiv<-fisher.binom.two.sided(dem0, dem1, eps_sub=.01, alpha=0.05)
dem_equiv<-c(dem_equiv$odds.ratio, dem_equiv$equiv.CI[1], dem_equiv$equiv.CI[2], "Democrat")

rep0<-sample_CAbday_reg_duringAVR$republican[sample_CAbday_reg_duringAVR$bday_preOct22==0]
rep1<-sample_CAbday_reg_duringAVR$republican[sample_CAbday_reg_duringAVR$bday_preOct22==1]
rep_equiv<-fisher.binom.two.sided(rep0, rep1, eps_sub=.01, alpha=0.05)
rep_equiv<-c(rep_equiv$odds.ratio, rep_equiv$equiv.CI[1], rep_equiv$equiv.CI[2], "Republican")

third0<-sample_CAbday_reg_duringAVR$third_party[sample_CAbday_reg_duringAVR$bday_preOct22==0]
third1<-sample_CAbday_reg_duringAVR$third_party[sample_CAbday_reg_duringAVR$bday_preOct22==1]
third_equiv<-fisher.binom.two.sided(third0, third1, eps_sub=.01, alpha=0.05)
third_equiv<-c(third_equiv$odds.ratio, third_equiv$equiv.CI[1], third_equiv$equiv.CI[2], "Third Party")

white0<-sample_CAbday_reg_duringAVR$white[sample_CAbday_reg_duringAVR$bday_preOct22==0]
white1<-sample_CAbday_reg_duringAVR$white[sample_CAbday_reg_duringAVR$bday_preOct22==1]
white_equiv<-fisher.binom.two.sided(white0, white1, eps_sub=.01, alpha=0.05)
white_equiv<-c(white_equiv$odds.ratio, white_equiv$equiv.CI[1], white_equiv$equiv.CI[2], "White")

black0<-sample_CAbday_reg_duringAVR$black[sample_CAbday_reg_duringAVR$bday_preOct22==0]
black1<-sample_CAbday_reg_duringAVR$black[sample_CAbday_reg_duringAVR$bday_preOct22==1]
black_equiv<-fisher.binom.two.sided(black0, black1, eps_sub=.01, alpha=0.05)
black_equiv<-c(black_equiv$odds.ratio, black_equiv$equiv.CI[1], black_equiv$equiv.CI[2], "Black")

hispanic0<-sample_CAbday_reg_duringAVR$hispanic[sample_CAbday_reg_duringAVR$bday_preOct22==0]
hispanic1<-sample_CAbday_reg_duringAVR$hispanic[sample_CAbday_reg_duringAVR$bday_preOct22==1]
hispanic_equiv<-fisher.binom.two.sided(hispanic0, hispanic1, eps_sub=.01, alpha=0.05)
hispanic_equiv<-c(hispanic_equiv$odds.ratio, hispanic_equiv$equiv.CI[1], hispanic_equiv$equiv.CI[2], "Hispanic")

asian0<-sample_CAbday_reg_duringAVR$asian[sample_CAbday_reg_duringAVR$bday_preOct22==0]
asian1<-sample_CAbday_reg_duringAVR$asian[sample_CAbday_reg_duringAVR$bday_preOct22==1]
asian_equiv<-fisher.binom.two.sided(asian0, asian1, eps_sub=.01, alpha=0.05)
asian_equiv<-c(asian_equiv$odds.ratio, asian_equiv$equiv.CI[1], asian_equiv$equiv.CI[2], "Asian")

age18230<-sample_CAbday_reg_duringAVR$age1823[sample_CAbday_reg_duringAVR$bday_preOct22==0]
age18231<-sample_CAbday_reg_duringAVR$age1823[sample_CAbday_reg_duringAVR$bday_preOct22==1]
age1823_equiv<-fisher.binom.two.sided(age18230, age18231, eps_sub=.01, alpha=0.05)
age1823_equiv<-c(age1823_equiv$odds.ratio, age1823_equiv$equiv.CI[1], age1823_equiv$equiv.CI[2], "Age 18-23")


age24300<-sample_CAbday_reg_duringAVR$age2430[sample_CAbday_reg_duringAVR$bday_preOct22==0]
age24301<-sample_CAbday_reg_duringAVR$age2430[sample_CAbday_reg_duringAVR$bday_preOct22==1]
age2430_equiv<-fisher.binom.two.sided(age24300, age24301, eps_sub=.01, alpha=0.05)
age2430_equiv<-c(age2430_equiv$odds.ratio, age2430_equiv$equiv.CI[1], age2430_equiv$equiv.CI[2], "Age 24-30")

age31400<-sample_CAbday_reg_duringAVR$age3140[sample_CAbday_reg_duringAVR$bday_preOct22==0]
age31401<-sample_CAbday_reg_duringAVR$age3140[sample_CAbday_reg_duringAVR$bday_preOct22==1]
age3140_equiv<-fisher.binom.two.sided(age31400, age31401, eps_sub=.01, alpha=0.05)
age3140_equiv<-c(age3140_equiv$odds.ratio, age3140_equiv$equiv.CI[1], age3140_equiv$equiv.CI[2], "Age 31-40")

age41600<-sample_CAbday_reg_duringAVR$age4160[sample_CAbday_reg_duringAVR$bday_preOct22==0]
age41601<-sample_CAbday_reg_duringAVR$age4160[sample_CAbday_reg_duringAVR$bday_preOct22==1]
age4160_equiv<-fisher.binom.two.sided(age41600, age41601, eps_sub=.01, alpha=0.05)
age4160_equiv<-c(age4160_equiv$odds.ratio, age4160_equiv$equiv.CI[1], age4160_equiv$equiv.CI[2], "Age 41-60")

age61plus0<-sample_CAbday_reg_duringAVR$age61plus[sample_CAbday_reg_duringAVR$bday_preOct22==0]
age61plus1<-sample_CAbday_reg_duringAVR$age61plus[sample_CAbday_reg_duringAVR$bday_preOct22==1]
age61plus_equiv<-fisher.binom.two.sided(age61plus0, age61plus1, eps_sub=.01, alpha=0.05)
age61plus_equiv<-c(age61plus_equiv$odds.ratio, age61plus_equiv$equiv.CI[1], age61plus_equiv$equiv.CI[2], "Age 61+")

data_equiv<-rbind(third_equiv,  rep_equiv, dem_equiv, asian_equiv, black_equiv, hispanic_equiv, white_equiv,  age61plus_equiv, age4160_equiv,  age3140_equiv, age2430_equiv, age1823_equiv)
data_equiv<-data.frame(data_equiv)
data_equiv<-data_equiv %>%
  rename(estimate=X1, lower=X2, upper=X3, name=X4)

data_equiv$estimate<-as.numeric(data_equiv$estimate) 
data_equiv$lower<-as.numeric(data_equiv$lower) 
data_equiv$upper<-as.numeric(data_equiv$upper) 
data_equiv$year<-"CA 2018"
data_equiv$full<-"Birthday Window"

equivalence_tests<-rbind(equivalence_tests, data_equiv)

equivalence_tests$name <- factor(equivalence_tests$name, levels=unique(equivalence_tests$name))

saveRDS(equivalence_tests, file="./PSRM Replication Files/California/Object Files/CA_equiv.rds")


### Test for differential registration bias
votersbybirthdate_sample<- sample_CAbday_reg_duringAVR %>%
  group_by(birth_day_of_year) %>%
  summarise(sumvoters=n(), Oct22=max(bday_preOct22))

##this is a copy of US_births_1994-2003_CDC_NCHS.csv from 538’s Github
births <- read_csv(file="https:/raw.githubusercontent.com/fivethirtyeight/data/master/births/US_births_1994-2003_CDC_NCHS.csv")

##sum of births by birthdate
birthsbyyear<-summaryBy(births ~ month + date_of_month, data=births, FUN=sum)

birthsbyyear<-birthsbyyear %>%
  mutate(birth_day_of_year = dplyr::row_number())

votersbybirthdate_sample<-left_join(votersbybirthdate_sample, birthsbyyear, by="birth_day_of_year")
votersbybirthdate_sample$prop_voters<-votersbybirthdate_sample$sumvoters/votersbybirthdate_sample$births.sum
reg_balance_sample<-t.test(prop_voters~Oct22, data=votersbybirthdate_sample)
reg_balance_sample

votersbybirthdate_full<- reg_duringAVR %>%
  group_by(birth_day_of_year) %>%
  summarise(sumvoters=n(), Oct22=max(bday_preOct22))

votersbybirthdate_full<-left_join(votersbybirthdate_full, birthsbyyear, by="birth_day_of_year")
votersbybirthdate_full$prop_voters<-votersbybirthdate_full$sumvoters/votersbybirthdate_full$births.sum
reg_balance_full<-t.test(prop_voters~Oct22, data=votersbybirthdate_full)
reg_balance_full



### Table 3 (California Component)

# 1 = Wald Estimation for subset registered during AVR in birthday window

table(sample_CAbday_reg_duringAVR$bday_preOct22)
prop1_1<-prop.test(reg_preOct22~bday_preOct22, data=sample_CAbday_reg_duringAVR, success=1)
firststage1<-prop1_1$estimate[2] - prop1_1$estimate[1]
prop2_1<-prop.test(vote_yes2018~bday_preOct22, data=sample_CAbday_reg_duringAVR, success=1)
effect1<-prop2_1$estimate[2] - prop2_1$estimate[1]
wald1<-effect1/firststage1
prop1_1
firststage1
prop2_1
effect1
wald1
iv1_wald<-ivreg(vote_yes2018 ~ reg_preOct22  | bday_preOct22, data=sample_CAbday_reg_duringAVR)
summary(iv1_wald, diagnostics = TRUE)


### Table 4

deadlineNov2014<-yday(ymd(20141020))
CA_file$reg2014_preOct20<-ifelse(CA_file$reg_day_of_year<=deadlineNov2014 & CA_file$reg_year==2014,1,0)

CA_file %>%
  filter(reg_year==2014 & samplewindow==1 & age_at_2018election>=22) %>%
  group_by(bday_preOct22) %>%
  count()

prop1_placebo<-prop.test(reg2014_preOct20~bday_preOct22, data=subset(CA_file, reg_year==2014 & samplewindow==1 & age_at_2018election>=22), success=1)
firststage_placebo<-prop1_placebo$estimate[2] - prop1_placebo$estimate[1]
prop2_placebo<-prop.test(vote_yes2014~bday_preOct22, data=subset(CA_file, reg_year==2014 & samplewindow==1 & age_at_2018election>=22), success=1)
effect_placebo<-prop2_placebo$estimate[2] - prop2_placebo$estimate[1]
wald1_placebo<-effect_placebo/firststage_placebo

prop1_placebo
firststage_placebo
prop2_placebo
effect_placebo
wald1_placebo

ivwaldplacebo<-ivreg(formula = vote_yes2014 ~ reg2014_preOct20  |
                bday_preOct22 , data = subset(CA_file, reg_year==2014 &
                                                samplewindow==1 & age_at_2018election>=22))

summary(ivwaldplacebo)


# TABLE 5 Instrumental variable regressions 

iv1<-ivreg(formula = vote_yes2018 ~ reg_preOct22 + populous_county + 
             white + black + hispanic + asian + democrat + republican + 
             third_party + age_at_2018election + female + nogender | bday_preOct22 + 
             populous_county + white + black + hispanic + asian + democrat + 
             republican + third_party + age_at_2018election + female + 
             nogender, data = sample_CAbday_reg_duringAVR)

ivnew<-ivreg(formula = vote_yes2018 ~ reg_preOct22 + populous_county + 
               black + white + asian + hispanic + democrat + republican + 
               third_party + age_at_2018election + female + nogender | bday_preOct22 + 
               populous_county + black + white + asian + hispanic + democrat + 
               republican + third_party + age_at_2018election + female + 
               nogender, data = subset(sample_CAbday_reg_duringAVR, new18_v3 == 
                                         1))

iv1.sum<- summary(iv1, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
ivnew.sum<- summary(ivnew, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
iv1.rob<- coeftest(iv1, function(x) vcovHC(x, type="HC0"))
ivnew.rob<- coeftest(ivnew, function(x) vcovHC(x, type="HC0"))

iv1.sum
ivnew.sum

save(iv1, ivnew, iv1.rob, ivnew.rob, ivnew.sum, iv1.sum, file = "./PSRM Replication Files/California/Object Files/CA iv main robust.RData")

# TABLE 6 Subgroup Regressions

iv18_23<- ivreg(formula = vote_yes2018 ~ reg_preOct22 + populous_county + 
                  black + white + asian + hispanic + democrat + republican + 
                  third_party + age_at_2018election + female + nogender | bday_preOct22 + 
                  populous_county + black + white + asian + hispanic + democrat + 
                  republican + third_party + age_at_2018election + female + 
                  nogender, data = subset(sample_CAbday_reg_duringAVR, new18_v3 == 1 & 
                                            age1823==1))
iv24_30<-ivreg(formula = vote_yes2018 ~ reg_preOct22 + populous_county + 
                 black + white + asian + hispanic + democrat + republican + 
                 third_party + age_at_2018election + female + nogender | bday_preOct22 + 
                 populous_county + black + white + asian + hispanic + democrat + 
                 republican + third_party + age_at_2018election + female + 
                 nogender, data = subset(sample_CAbday_reg_duringAVR, new18_v3 == 1 & 
                                           age2430==1))
iv31_40<- ivreg(formula = vote_yes2018 ~ reg_preOct22 + populous_county + 
                  black + white + asian + hispanic + democrat + republican + 
                  third_party + age_at_2018election + female + nogender | bday_preOct22 + 
                  populous_county + black + white + asian + hispanic + democrat + 
                  republican + third_party + age_at_2018election + female + 
                  nogender, data = subset(sample_CAbday_reg_duringAVR, new18_v3 == 1 & 
                                            age3140==1))
iv41_60<- ivreg(formula = vote_yes2018 ~ reg_preOct22 + populous_county + 
                  black + white + asian + hispanic + democrat + republican + 
                  third_party + age_at_2018election + female + nogender | bday_preOct22 + 
                  populous_county + black + white + asian + hispanic + democrat + 
                  republican + third_party + age_at_2018election + female + 
                  nogender, data = subset(sample_CAbday_reg_duringAVR, new18_v3 == 1 &
                                            age4160==1))
iv61plus<- ivreg(formula = vote_yes2018 ~ reg_preOct22 + populous_county + 
                   black + white + asian + hispanic + democrat + republican + 
                   third_party + age_at_2018election + female + nogender | bday_preOct22 + 
                   populous_county + black + white + asian + hispanic + democrat + 
                   republican + third_party + age_at_2018election + female + 
                   nogender, data = subset(sample_CAbday_reg_duringAVR, new18_v3 == 1 & 
                                             age61plus==1))
ivfemale<- ivreg(formula = vote_yes2018 ~ reg_preOct22 + populous_county + 
                   black + white + asian + hispanic + democrat + republican + 
                   third_party + age_at_2018election + female + nogender | bday_preOct22 + 
                   populous_county + black + white + asian + hispanic + democrat + 
                   republican + third_party + age_at_2018election + female + 
                   nogender, data = subset(sample_CAbday_reg_duringAVR, new18_v3 == 1 & 
                                             female==1))
ivmale<- ivreg(formula = vote_yes2018 ~ reg_preOct22 + populous_county + 
                 black + white + asian + hispanic + democrat + republican + 
                 third_party + age_at_2018election + female + nogender | bday_preOct22 + 
                 populous_county + black + white + asian + hispanic + democrat + 
                 republican + third_party + age_at_2018election + female + 
                 nogender, data = subset(sample_CAbday_reg_duringAVR, new18_v3 == 1 & 
                                           male==1))
ivpop<- ivreg(formula = vote_yes2018 ~ reg_preOct22 + populous_county + 
                black + white + asian + hispanic + democrat + republican + 
                third_party + age_at_2018election + female + nogender | bday_preOct22 + 
                populous_county + black + white + asian + hispanic + democrat + 
                republican + third_party + age_at_2018election + female + 
                nogender, data = subset(sample_CAbday_reg_duringAVR, new18_v3 == 1 &
                                          populous_county==1))
ivnonpop<- ivreg(formula = vote_yes2018 ~ reg_preOct22 + populous_county + 
                   black + white + asian + hispanic + democrat + republican + 
                   third_party + age_at_2018election + female + nogender | bday_preOct22 + 
                   populous_county + black + white + asian + hispanic + democrat + 
                   republican + third_party + age_at_2018election + female + 
                   nogender, data = subset(sample_CAbday_reg_duringAVR, new18_v3 == 1 & 
                                             populous_county==0))
ivwhite<- ivreg(formula = vote_yes2018 ~ reg_preOct22 + populous_county + 
                  black + white + asian + hispanic + democrat + republican + 
                  third_party + age_at_2018election + female + nogender | bday_preOct22 + 
                  populous_county + black + white + asian + hispanic + democrat + 
                  republican + third_party + age_at_2018election + female + 
                  nogender, data = subset(sample_CAbday_reg_duringAVR, new18_v3 == 1 & 
                                            white==1))
ivblack<- ivreg(formula = vote_yes2018 ~ reg_preOct22 + populous_county + 
                  black + white + asian + hispanic + democrat + republican + 
                  third_party + age_at_2018election + female + nogender | bday_preOct22 + 
                  populous_county + black + white + asian + hispanic + democrat + 
                  republican + third_party + age_at_2018election + female + 
                  nogender, data = subset(sample_CAbday_reg_duringAVR, new18_v3 == 1 & 
                                            black==1))
ivhispanic<- ivreg(formula = vote_yes2018 ~ reg_preOct22 + populous_county + 
                     black + white + asian + hispanic + democrat + republican + 
                     third_party + age_at_2018election + female + nogender | bday_preOct22 + 
                     populous_county + black + white + asian + hispanic + democrat + 
                     republican + third_party + age_at_2018election + female + 
                     nogender, data = subset(sample_CAbday_reg_duringAVR, new18_v3 == 1 & 
                                               hispanic==1))
ivasian<-ivreg(formula = vote_yes2018 ~ reg_preOct22 + populous_county + 
                 black + white + asian + hispanic + democrat + republican + 
                 third_party + age_at_2018election + female + nogender | bday_preOct22 + 
                 populous_county + black + white + asian + hispanic + democrat + 
                 republican + third_party + age_at_2018election + female + 
                 nogender, data = subset(sample_CAbday_reg_duringAVR, new18_v3 == 1 &
                                           asian==1))


iv18_23.sum<- summary( iv18_23, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
iv24_30.sum<- summary( iv24_30, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
iv31_40.sum<- summary( iv31_40, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
iv41_60.sum<- summary( iv41_60, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
iv61plus.sum<- summary( iv61plus, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
ivfemale.sum<- summary( ivfemale, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
ivmale.sum<- summary( ivmale, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
ivpop.sum<- summary( ivpop, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
ivnonpop.sum<- summary( ivnonpop, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
ivwhite.sum<- summary( ivwhite, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
ivblack.sum<- summary( ivblack, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
ivhispanic.sum<- summary( ivhispanic, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
ivasian.sum<- summary( ivasian, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)


iv18_23.rob<- coeftest( iv18_23, function(x) vcovHC(x, type="HC0"))
iv24_30.rob<- coeftest( iv24_30, function(x) vcovHC(x, type="HC0"))
iv31_40.rob<- coeftest( iv31_40, function(x) vcovHC(x, type="HC0"))
iv41_60.rob<- coeftest( iv41_60, function(x) vcovHC(x, type="HC0"))
iv61plus.rob<- coeftest( iv61plus, function(x) vcovHC(x, type="HC0"))
ivfemale.rob<- coeftest( ivfemale, function(x) vcovHC(x, type="HC0"))
ivmale.rob<- coeftest( ivmale, function(x) vcovHC(x, type="HC0"))
ivpop.rob<- coeftest( ivpop, function(x) vcovHC(x, type="HC0"))
ivnonpop.rob<- coeftest( ivnonpop, function(x) vcovHC(x, type="HC0"))
ivwhite.rob<- coeftest( ivwhite, function(x) vcovHC(x, type="HC0"))
ivblack.rob<- coeftest( ivblack, function(x) vcovHC(x, type="HC0"))
ivhispanic.rob<- coeftest( ivhispanic, function(x) vcovHC(x, type="HC0"))
ivasian.rob<- coeftest( ivasian, function(x) vcovHC(x, type="HC0"))


gaze.coeft <- function(x, col="Std. Error"){
  stopifnot(is.list(x))
  out <- lapply(x, function(y){
    y[ , col]
  })
  return(out)
}

gaze.lines.ivreg.diagn <- function(x, col="statistic", row=1:3, digits=1){
  stopifnot(is.list(x))
  out <- lapply(x, function(y){
    stopifnot(class(y)=="summary.ivreg")
    y$diagnostics[row, col, drop=FALSE]
  })
  out <- as.list(data.frame(t(as.data.frame(out)), check.names = FALSE))
  for(i in 1:length(out)){
    out[[i]] <- c(names(out)[i], round(out[[i]], digits=digits))
  }
  return(out)
}

stargazer(iv18_23,  iv24_30,  iv31_40,  iv41_60,  iv61plus,  ivfemale,  ivmale,  ivpop,  ivnonpop,  ivwhite,  ivblack,  ivhispanic,  ivasian,  keep="reg_preOct22",
          dep.var.labels =NULL, digits=2, column.labels= c("Aged 18-23", "Aged 24-30", "Aged 31-40", "Aged 41-60", "Aged 61 Plus", "Female", "Male", "Populous County", "Nonpopulous County", "White", "Black", "Hispanic", "Asian"),
          se = gaze.coeft(list(iv18_23.rob,  iv24_30.rob,  iv31_40.rob,  iv41_60.rob,  iv61plus.rob,  ivfemale.rob,  ivmale.rob,   ivpop.rob,  ivnonpop.rob,  ivwhite.rob,  ivblack.rob,  ivhispanic.rob,  ivasian.rob)), df=FALSE, omit.stat=c("adj.rsq","ser"),
          add.lines = gaze.lines.ivreg.diagn(list( iv18_23.sum,  iv24_30.sum,  iv31_40.sum,  iv41_60.sum,  iv61plus.sum,  ivfemale.sum,  ivmale.sum,  ivpop.sum,  ivnonpop.sum,  ivwhite.sum,  ivblack.sum,  ivhispanic.sum,  ivasian.sum), row=1), out="./PSRM Replication Files/California/Object Files/CA18_subgroupiv.tex")



rm(iv18_23,  iv24_30,  iv31_40,  iv41_60,  iv61plus,  ivfemale,  ivmale,  ivpop,  ivnonpop,  ivwhite,  ivblack,  ivhispanic,  ivasian, iv18_23.rob,  iv24_30.rob,  iv31_40.rob,  iv41_60.rob,  iv61plus.rob,  ivfemale.rob,  ivmale.rob,   ivpop.rob,  ivnonpop.rob,  ivwhite.rob,  ivblack.rob,  ivhispanic.rob,  ivasian.rob, iv18_23.sum,  iv24_30.sum,  iv31_40.sum,  iv41_60.sum,  iv61plus.sum,  ivfemale.sum,  ivmale.sum,  ivpop.sum,  ivnonpop.sum,  ivwhite.sum,  ivblack.sum,  ivhispanic.sum,  ivasian.sum)
gc()
#### Table 7

# Data preparation

# Existing registrants by birth year 


CA_file<-CA_file %>%
  mutate(filter2019=ifelse(reg_year==2019 & VoterStatusReasonCodeDesc=="New Registration" & vote_yes2018==0 & vote_yes2016==0, 1, 0)) %>%
  mutate(filter2018p_oct22=ifelse(reg_year==2018 & reg_preOct22==0 & VoterStatusReasonCodeDesc=="New Registration" & vote_yes2018==0 & vote_yes2016==0, 1, 0))

###limited data to registration occurring by October 22 but during AVR

registered_count <-CA_file %>%
  filter(filter2019!=1, filter2018p_oct22!=1) %>%
  count(birth_year, name="registered")

# CA Population by Birth year in 2018 (imputed estimates as of Jan 1)

population<-tibble::tribble(
  ~birth_year, ~population,
  1980L,      552669,
  1979L,      536136,
  1978L,      510614,
  1977L,    503369.5,
  1976L,    496823.5,
  1975L,    499036.5,
  1974L,      496937,
  1973L,    488428.5,
  1972L,    493882.5,
  1971L,      509878,
  1970L,      530975,
  1969L,    525319.5,
  1968L,    503191.5,
  1967L,    490716.5,
  1966L,    486115.5,
  1965L,    499894.5,
  1964L,      513661,
  1963L,    512766.5,
  1962L,      506726,
  1961L,      503099,
  1960L,      507927,
  1959L,      498124,
  1958L,    479834.5
)


# Calculate difference between registration and population in subset
population<-left_join(population, registered_count, by="birth_year")

population$unregistered<-population$population-population$registered

# Known characteristics
population$vote_yes2018<-0
population$AVR<-0
population$any_reg_2018<-0
population$democrat<-0
population$republican<-0
population$third_party<-0
population$not_registered<-1

# Create dataset with row for each unregistered person
unregistered_expanded <-expandRows(population, "unregistered")
unregistered_expanded <-unregistered_expanded %>%
  select(-registered, -population)

# Limit data to relevant age group and new registrants eligible to vote in Nov. General
CA_limit_full<-CA_file %>%
  filter(filter2019!=1, filter2018p_oct22!=1) %>%
  filter(birth_year>=1958, birth_year<=1980) %>%
  filter(reg_duringAVR==1)  %>%
  filter(new18_v3==1)

# Dummy variable for 2018 registration by any means

CA_limit_full$any_reg_2018<-1

# Add unregistered individuals to data
CA_limit_full<-CA_limit_full %>%
  bind_rows(CA_limit_full, unregistered_expanded)

# Create instrument
CA_limit_full<-CA_limit_full %>%
  mutate(high_prob=case_when(
    (2018-birth_year) %% 4 == 3  ~ 1, 
    TRUE~0)) 


# Create age variable

CA_limit_full<-CA_limit_full %>%
  mutate(age=2018-birth_year)

# Table 7 

CAiv_any_vote_new<-ivreg(vote_yes2018 ~  any_reg_2018 + age | high_prob + age  , data=subset(CA_limit_full))
summary(CAiv_any_vote_new, diagnostics = TRUE)

saveRDS(CAiv_any_vote_new, file="./PSRM Replication Files/California/Object Files/CAiv_any_vote_new_sensitivity.rds")

CAiv_any_vote_new.rob  <- coeftest(CAiv_any_vote_new, function(x) vcovHC(x, type="HC0"))
CAiv_any_vote_new.sum<- summary(CAiv_any_vote_new, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)

save(CAiv_any_vote_new,CAiv_any_vote_new.rob, CAiv_any_vote_new.sum, file = "./PSRM Replication Files/California/Object Files/CA iv sensitivity.RData")

### Figure 3 (California)


CA_regmethod <-CA_file %>%
  filter(reg_year==2018, RegistrationMethodCode=="Dl44" | RegistrationMethodCode=="RBM")

ggplot_highlight2018<-CA_file %>%
  filter(reg_year==2018) %>%
  filter(birth_year>=1959, birth_year<=1980, RegistrationMethodCode=="DL44" | RegistrationMethodCode=="RBM") %>%
  filter(birth_year %% 4 == 3) %>%
  group_by(birth_year) %>%
  summarize(count=n()) 

ggplot_highlight2019<-CA_file %>%
  filter(reg_year==2019) %>%
  filter(birth_year>=1959, birth_year<=1980, RegistrationMethodCode=="DL44" | RegistrationMethodCode=="RBM") %>%
  filter(birth_year %% 4 == 0) %>%
  group_by(birth_year) %>%
  summarize(count=n())

ggplot_full2019<-CA_file %>%
  filter(reg_year==2019) %>%
  filter(birth_year>=1932, birth_year<=1999, RegistrationMethodCode=="DL44" | RegistrationMethodCode=="RBM") %>%
  group_by(birth_year) %>%
  summarize(count=n())

CA_regmethod %>%
  filter(reg_year==2018) %>%
  filter(birth_year>=1934, birth_year<=1999, RegistrationMethodCode=="DL44" | RegistrationMethodCode=="RBM") %>%
  group_by(birth_year) %>%
  summarize(count=n()) %>%
  ggplot(aes(x = birth_year, y=count)) + ylab("Count Registered by AVR") + xlab("Birth Year") + ggtitle("California 2018") + geom_point() + geom_line() + geom_point(data=ggplot_highlight2018, aes(x = birth_year, y=count), size=3)

CA_file %>%
  filter(reg_year==2018) %>%
  filter(birth_year>=1934, birth_year<=1999, RegistrationMethodCode=="DL44" | RegistrationMethodCode=="RBM") %>%
  group_by(birth_year) %>%
  summarize(count=n()) %>%
  ggplot(aes(x = birth_year, y=count)) + ylab("Count Registered by AVR ") + xlab("Birth Year") + ggtitle("California 2018 and 2019") + geom_point() + geom_line() +
  geom_point(data=ggplot_highlight2018, aes(x = birth_year, y=count), size=3, shape="square") +
  geom_point(data=ggplot_highlight2019, aes(x = birth_year, y=count), size=3, shape="square") +
  geom_point(data=ggplot_full2019, aes(x = birth_year, y=count)) +
  geom_line(data=ggplot_full2019, aes(x = birth_year, y=count)) +
  geom_text(aes(x = 1948, y = 40000, label = "2018")) + 
  geom_text(aes(x = 1983, y = 18000, label = "2019")) + theme_minimal()


ggsave("./PSRM Replication Files/figures/figure_3_CA.png", height=1600, width=2600, units="px")

### Statistics presented in section entitled "Registration and Total Turnout Effect"


# How does birth year affect AVR vs Traditional Registration
CA_limit_full<-CA_limit_full %>%
  mutate(traditional=case_when(AVR==1 ~ 0,
                               not_registered==1 ~ 0,
                               TRUE ~1))


prop1_1_avr<-prop.test(AVR~high_prob, data=CA_limit_full, success=1)
firststage1_avr<-prop1_1_avr$estimate[2] - prop1_1_avr$estimate[1]
increase_avr<-(prop1_1_avr$estimate[1]-prop1_1_avr$estimate[2])/prop1_1_avr$estimate[2]
increase_avr
prop1_1_trad<-prop.test(traditional~high_prob, data=CA_limit_full, success=1)
firststage1_trad<-prop1_1_trad$estimate[2] - prop1_1_trad$estimate[1]
increase_trad<-(prop1_1_trad$estimate[1]-prop1_1_trad$estimate[2])/prop1_1_trad$estimate[2]
increase_trad
would_be_trad<-as.numeric(firststage1_trad/firststage1_avr)*-1
would_be_trad

# Count of Number of AVR Voters Between May 1 and Nov 1
AVR_count<-CA_file %>%
  filter(new18_v3==1, AVR==1) %>%
  filter(lub_regdate>=mdy(05012018) & lub_regdate<mdy(11012018)) %>%
  summarize(n=n())

AVR_count<-as.numeric(AVR_count)



###### Using high probability birth year instrument to calculate percent counterfactual motor voter

CA_file<-CA_file %>%
  mutate(high_prob=case_when(
    (2018-birth_year) %% 4 == 3  ~ 1, 
    TRUE~0)) 

CA_file <- CA_file %>%
  mutate(high_prob_reg=case_when(
    ((2018-birth_year) %% 4 == 3) & reg_year==2018  ~ 1, 
    ((2017-birth_year) %% 4 == 3) & reg_year==2017  ~ 1, 
    ((2016-birth_year) %% 4 == 3) & reg_year==2016  ~ 1, 
    ((2015-birth_year) %% 4 == 3) & reg_year==2015  ~ 1, 
    ((2014-birth_year) %% 4 == 3) & reg_year==2014  ~ 1, 
    ((2013-birth_year) %% 4 == 3) & reg_year==2013  ~ 1, 
    ((2012-birth_year) %% 4 == 3) & reg_year==2012  ~ 1, 
    ((2011-birth_year) %% 4 == 3) & reg_year==2011  ~ 1, 
    ((2010-birth_year) %% 4 == 3) & reg_year==2010  ~ 1, 
    ((2009-birth_year) %% 4 == 3) & reg_year==2009  ~ 1, 
    ((2008-birth_year) %% 4 == 3) & reg_year==2008  ~ 1, 
    TRUE~0)) 

CA_file <- CA_file %>%
  mutate(factor_reg_year=factor(reg_year))

CA_file <- CA_file %>%
  mutate(presidential_election=if_else(reg_year%%4==0, 1,0)) %>%
  mutate(midterm_election=if_else(reg_year%%4==2, 1, 0))


# Regression presented in Reviewer Material Appendix Table 1
logit_high_prob_reg<- glm(high_prob_reg ~ reg_duringAVR + midterm_election + 
                            presidential_election, family = binomial(link = "logit"), data = subset(CA_file, reg_year > 2007 & reg_year < 2019 & 
                                                                                                      birth_year > 1958 & birth_year < 1980 & VoterStatusReasonCodeDesc == 
                                                                                                      "New Registration"))
summary(logit_high_prob_reg)
logitmargins<-margins(logit_high_prob_reg)
logitpredict<-prediction(logit_high_prob_reg, at= list(reg_duringAVR=0))
logitpredict2<-prediction(logit_high_prob_reg, at= list(reg_duringAVR=1))

mean(logitpredict$fitted, na.rm=TRUE)
mean(logitpredict2$fitted, na.rm=TRUE)
mean(logitpredict$fitted, na.rm=TRUE)- mean(logitpredict2$fitted, na.rm=TRUE)

mean(logitpredict$fitted)-0.25
mean(logitpredict2$fitted)-0.25

would_be_motor<-(mean(logitpredict$fitted)-0.25)/(mean(logitpredict2$fitted)-0.25)
would_be_motor


### Calculating difference in voting rates with historical data and birthdate instrument



CA_file<-CA_file %>%
  mutate(vote_yes201X=case_when(reg_year=2018 & vote_yes2018==1~ 1, 
                                reg_year=2018 & vote_yes2018==0~ 0,
                                reg_year=2016 & vote_yes2016==1~ 1, 
                                reg_year=2016 & vote_yes2016==0~ 0,
                                reg_year=2014 & vote_yes2014==1~ 1,
                                reg_year=2014 & vote_yes2014==0~ 0,
                                TRUE ~ NA_real_))


deadlineNov2016<-yday(ymd(2011024))
CA_file$reg2016_preOct24<-ifelse(CA_file$reg_day_of_year<=deadlineNov2016 & CA_file$reg_year==2016,1,0)

CA_file<-CA_file %>%
  mutate(reg201X_bydeadline=case_when(reg2014_preOct20==1 & reg_year==2014~1,
                                      reg2014_preOct20==0 & reg_year==2014~0,
                                      reg2016_preOct24==1 & reg_year==2016~1,
                                      reg2016_preOct24==0 & reg_year==2016~0,
                                      reg_preOct22==1 & reg_year==2018~1,
                                      reg_preOct22==0 & reg_year==2018~0,
  ))


CA_file<-CA_file %>%
  mutate(windowX=case_when(reg_year==2018 & (birth_day_of_year<=(deadlineNov2018+15) & birth_day_of_year>=(deadlineNov2018-15)) & leap_year(lub_birthdate)==TRUE ~ 1,
                           reg_year==2018 & (birth_day_of_year<=(deadlineNov2018+15-1) & birth_day_of_year>=(deadlineNov2018-15-1)) & leap_year(lub_birthdate)==FALSE ~ 1,
                           reg_year==2016 & (birth_day_of_year<=(deadlineNov2016+15) & birth_day_of_year>=(deadlineNov2016-15)) & leap_year(lub_birthdate)==TRUE ~ 1,
                           reg_year==2016 & (birth_day_of_year<=(deadlineNov2016+15-1) & birth_day_of_year>=(deadlineNov2016-15-1)) & leap_year(lub_birthdate)==FALSE ~ 1,
                           reg_year==2014 & (birth_day_of_year<=(deadlineNov2014+15) & birth_day_of_year>=(deadlineNov2014-15)) & leap_year(lub_birthdate)==TRUE ~ 1,
                           reg_year==2014 & (birth_day_of_year<=(deadlineNov2014+15-1) & birth_day_of_year>=(deadlineNov2014-15-1)) & leap_year(lub_birthdate)==FALSE ~ 1,
  ))


CA_file<-CA_file %>%
  mutate(early_bdaygroupX=case_when(reg_year==2014 & birth_day_of_year>deadlineNov2014 & leap_year(lub_birthdate)==TRUE ~ 0, 
                                    reg_year==2014 & birth_day_of_year+1>deadlineNov2014 & leap_year(lub_birthdate)==FALSE ~ 0, 
                                    reg_year==2016 & birth_day_of_year>deadlineNov2016 & leap_year(lub_birthdate)==TRUE ~ 0, 
                                    reg_year==2016 & birth_day_of_year+1>deadlineNov2016 & leap_year(lub_birthdate)==FALSE ~ 0, 
                                    reg_year==2018 & birth_day_of_year>deadlineNov2018 & leap_year(lub_birthdate)==TRUE ~ 0, 
                                    reg_year==2018 & birth_day_of_year+1>deadlineNov2018 & leap_year(lub_birthdate)==FALSE ~ 0, 
                                    TRUE ~ 1))

CA_file<-CA_file %>%
  mutate(over18X=case_when(reg_year==2014 & age_at_2018election>=22 ~ 1,
                           reg_year==2016 & age_at_2018election>=20 ~ 1,
                           reg_year==2018 & age_at_2018election>=18 ~ 1))

CA_file<-CA_file %>%
  mutate(post_AVR= case_when(reg_year==2014 ~ 0,
                             reg_year==2016  ~ 0,
                             reg_year==2018  ~ 1))

CA_file$reg201X_bydeadline<-as.factor(CA_file$reg201X_bydeadline)


CA_file<-CA_file %>%
  mutate(newX= case_when(reg_year==2014 & VoterStatusReasonCodeDesc=="New Registration" & vote_yes2012==0  ~ 1,
                         reg_year==2016 & VoterStatusReasonCodeDesc=="New Registration" & vote_yes2014==0  ~ 1,
                         reg_year==2018 & VoterStatusReasonCodeDesc=="New Registration" & vote_yes2016==0  ~ 1))


CAreg_postApril23<-CA_file %>%
  filter(yday(lub_regdate)>=113)  %>%
  filter(reg_year==2014 | reg_year==2016 | reg_year==2018) %>%
  filter(windowX==1, over18X==1)


### IV regression in Reviewer Material Appendix, Table 2

CAiv_diffdiff_new<-ivreg(vote_yes201X ~ reg201X_bydeadline*post_AVR + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2018election   | early_bdaygroupX*post_AVR + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2018election , data=subset(CAreg_postApril23, (reg_year==2014 | reg_year==2016 | reg_year==2018) & windowX==1 & over18X==1 & VoterStatusReasonCodeDesc=="New Registration"))

CAiv_diffdiff_new.rob  <- coeftest(CAiv_diffdiff_new, function(x) vcovHC(x, type="HC0"))
CAiv_diffdiff_new.sum<- summary(CAiv_diffdiff_new, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
CAiv_diffdiff_new.sum
save(CAiv_diffdiff_new, CAiv_diffdiff_new.rob, CAiv_diffdiff_new.sum, file = "./PSRM Replication Files/California/Object Files/CA iv diffdiff new.RData")


# Calculate difference in voting rates (motor voter vs AVR)

diffdiff_predictAVR<-prediction(CAiv_diffdiff_new, at = list(post_AVR = 1, reg201X_bydeadline="1"))
diffdiff_predictmotor<-prediction(CAiv_diffdiff_new, at = list(post_AVR = 0, reg201X_bydeadline="1"))

motor_turnout_diff<-mean(diffdiff_predictmotor$fitted)-mean(diffdiff_predictAVR$fitted)
motor_turnout_diff

#logit_diffdiff_margin1<-prediction(CAiv_diffdiff_new, at= list(c(post_AVR=0, reg201X_bydeadline=="1")))
#logit_diffdiff_margin2<-prediction(CAiv_diffdiff_new, at= list(c(post_AVR=1, reg201X_bydeadline=="1")))

#motor_turnout_diff<-mean(logit_diffdiff_margin1$fitted)-mean(logit_diffdiff_margin2$fitted)
#motor_turnout_diff


# final turnout calculation

only_reg_avr<-(AVR_count*(1-would_be_trad))*(1-would_be_motor)
only_reg_avr
turnout_rate<-as.numeric(ivnew$coefficients[2])
turnout_rate
turnout_rate_less_motor<-(turnout_rate-(motor_turnout_diff+turnout_rate)*would_be_motor)/(1-would_be_motor)
turnout_rate_less_motor
AVR_voter_calc<-only_reg_avr*turnout_rate_less_motor
AVR_voter_calc
AVR_voter_calc_VEP<-AVR_voter_calc/VEP
AVR_voter_calc_VEP





sink() # final line here
